# Import the Data File
library(readr)
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2   3.5.1      ✔ fma       2.5   
## ✔ forecast  8.24.0     ✔ expsmooth 2.3
## Warning: package 'forecast' was built under R version 4.4.1
## 
library(TTR)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Natural_Gas_Prices <- read_csv("natural_gas_prices_monthly.csv")
## Rows: 284 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Month
## dbl (1): Price
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View(Natural_Gas_Prices)

My_Data <- Natural_Gas_Prices
attributes(My_Data)
## $row.names
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284
## 
## $names
## [1] "Month" "Price"
## 
## $spec
## cols(
##   Month = col_character(),
##   Price = col_double()
## )
## 
## $problems
## <pointer: 0x13f66df80>
## 
## $class
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
My_Data$Month <- as.yearmon(My_Data$Month,"%Y-%m")

start_year <- year(min(My_Data$Month))
start_month <- month(min(My_Data$Month))
end_year <- year(max(My_Data$Month))
end_month <- month(max(My_Data$Month))

series <- ts(My_Data$Price, start = c(start_year,start_month), frequency = 12)
attributes(series)
## $tsp
## [1] 1997.000 2020.583   12.000
## 
## $class
## [1] "ts"
# Plot and Inference

plot(series,
     main = paste("Time Series Plot for Natural Gas Prices"),
     xlab = "Year",
     ylab = "Natural Gas Prices",
     col = "black",
     lwd = 2)

# The natural gas prices seem to spike every few years in a regular time interval. However, the intensity of the spikes vary throughout the years. Around 2001, a bigger spike is observed because this was the time when US experienced the most frigid winter in approximately 90 years, therefore there was an increased demand for heating which was not possible to be compensated by low production of natural gas leading up to 2001. Around 2005, there is a price spike after hurricanes Katrina and Rita. The natural disasters led to production disruptions, higher demand and supply shortages of natural gas. Last higher spike around 2009 was due to a combination of economic recession and financial speculation. 

acf(series, 
    main = paste("Autocorrelogram Plot for Natural Gas Prices"))

# The ACF trend shows that the plot declines at a faster pace a little prior to hitting lag 1.0. However, after that point the plot declines at a slower rate but it will eventually move below the significant line. Based on the plot, there are no repeating spikes or regular cyclical patterns indicating no specific evidence of seasonality in this dataset.

# Central Tendency
summary(series)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.630   2.660   3.560   4.208   5.327  13.420
boxplot(series,
        main = "Boxplot For Natural Gas Prices",
        ylab = "Value")

# The median of the boxplot is 3.560, however since the median line is closer to the bottom of the box, it proves that the distribution is right (positive)-skewed. The IQR of this boxplot is 2.660 which exceeds one-third of the median value indicating that there is high variability in the data set. There are many outliers towards higher values coinciding with the spikes observed in time series plot. 

# Naive Method
naive_model <- naive(series)
plot(naive_model)

checkresiduals(naive_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from Naive method
## Q* = 41.408, df = 24, p-value = 0.01502
## 
## Model df: 0.   Total lags used: 24
naive_residuals <- residuals(naive_model)
naive_residuals
##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
## 1997    NA -1.30 -0.26  0.14  0.22 -0.05 -0.01  0.30  0.39  0.19 -0.06 -0.66
## 1998 -0.26  0.14  0.01  0.19 -0.29  0.03  0.00 -0.32  0.17 -0.11  0.21 -0.40
## 1999  0.13 -0.08  0.02  0.36  0.11  0.04  0.01  0.49 -0.25  0.18 -0.36 -0.01
## 2000  0.06  0.24  0.13  0.25  0.55  0.70 -0.30  0.44  0.63 -0.04  0.50  3.38
## 2001 -0.73 -2.56 -0.38 -0.04 -1.00 -0.47 -0.61 -0.14 -0.78  0.27 -0.12 -0.04
## 2002  0.02  0.00  0.71  0.40  0.07 -0.24 -0.27  0.10  0.46  0.58 -0.09  0.70
## 2003  0.69  2.28 -1.78 -0.67  0.55  0.01 -0.79 -0.04 -0.37  0.01 -0.16  1.66
## 2004  0.01 -0.77  0.02  0.32  0.62 -0.06 -0.34 -0.52 -0.26  1.20 -0.18  0.41
## 2005 -0.43 -0.01  0.82  0.20 -0.69  0.71  0.45  1.90  2.22  1.67 -3.12  2.75
## 2006 -4.36 -1.15 -0.65  0.27 -0.91 -0.04 -0.04  0.97 -2.24  0.95  1.56 -0.68
## 2007 -0.18  1.45 -0.89  0.49  0.04 -0.29 -1.13  0.00 -0.14  0.66  0.36  0.01
## 2008  0.88  0.55  0.87  0.77  1.09  1.42 -1.60 -2.83 -0.59 -0.93 -0.06 -0.86
## 2009 -0.58 -0.72 -0.56 -0.46  0.33 -0.03 -0.42 -0.24 -0.15  1.02 -0.35  1.69
## 2010  0.48 -0.51 -1.03 -0.26  0.11  0.66 -0.17 -0.31 -0.43 -0.46  0.28  0.54
## 2011  0.24 -0.40 -0.12  0.27  0.07  0.23 -0.12 -0.36 -0.16 -0.33 -0.33 -0.07
## 2012 -0.50 -0.16 -0.34 -0.22  0.48  0.03  0.49 -0.11  0.01  0.47  0.22 -0.20
## 2013 -0.01  0.00  0.48  0.36 -0.13 -0.21 -0.21 -0.19  0.19  0.06 -0.04  0.60
## 2014  0.47  1.29 -1.10 -0.24 -0.08  0.01 -0.54 -0.14  0.01 -0.14  0.34 -0.64
## 2015 -0.49 -0.12 -0.04 -0.22  0.24 -0.07  0.06 -0.07 -0.11 -0.32 -0.25 -0.16
## 2016  0.35 -0.29 -0.26  0.19  0.00  0.67  0.23  0.00  0.17 -0.01 -0.43  1.04
## 2017 -0.29 -0.45  0.03  0.22  0.05 -0.17  0.00 -0.08  0.08 -0.10  0.13 -0.19
## 2018  1.05 -1.20  0.02  0.11  0.00  0.17 -0.14  0.13  0.04  0.28  0.81 -0.05
## 2019 -0.93 -0.42  0.26 -0.30 -0.01 -0.24 -0.03 -0.15  0.34 -0.23  0.32 -0.43
## 2020 -0.20 -0.11 -0.12 -0.05  0.01 -0.12  0.14  0.53
plot(naive_residuals,
     main = "Residuals from Naive Forecast for Natural Gas Prices",
     ylab = "Residuals", xlab = "Time",
     col = "blue", type = "l")
abline(h = 0, col = "red")

# The plot indicates that the model captured the main pattern of the model because there are random fluctuations around 0 except for the few years that had extremely high spikes. 

hist(naive_residuals,
     main = "Histogram of Residuals",
     xlab = "Residuals",
     col = "lightblue")

# The histogram roughly forms a bell-shaped curve centered around 0, however it is slightly higher left of the zero, indicating the histogram is slightly skewed to the right.

plot(fitted(naive_model), residuals(naive_model),
     main = "Residuals vs Fitted Values",
     xlab = "Fitted Values",
     ylab = "Residuals",
     col = "steelblue", pch = 19)
abline(h = 0, col = "red", lwd = 2)

# The plot forms a sideways fan-shaped, where the low fitted values are tightly clustered around 0 and high fitted values are spread out both negatively and positively. This basically indicates that the variance of residuals is not constant because there is small variance at lower fitted values and large variance at higher fitted values.

plot(series, residuals(naive_model),
     main = "Residuals vs Actual Values",
     xlab = "Actual Values",
     ylab = "Residuals",
     col = "navy", pch = 19)
abline(h = 0, col = "red", lwd = 2)

# This plot also fans sideways, where the variance increases with higher actual values indicating smaller variance at lower actual values and larger variance at higher actual values.

Acf(naive_residuals)

# ACF plot has spikes between the blue bands except for 2 spikes, this indicates that the residuals are uncorrelated, therefore the model is an adequate fit.

accuracy(naive_model)
##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set -0.004063604 0.738552 0.4525442 -1.029336 9.927873 0.3122657
##                     ACF1
## Training set -0.03283981
naive_forecast <- naive(series,12)
naive_forecast
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Sep 2020            2.3  1.3535076 3.246492  0.8524647 3.747535
## Oct 2020            2.3  0.9614576 3.638542  0.2528760 4.347124
## Nov 2020            2.3  0.6606270 3.939373 -0.2072046 4.807205
## Dec 2020            2.3  0.4070151 4.192985 -0.5950705 5.195071
## Jan 2021            2.3  0.1835786 4.416421 -0.9367872 5.536787
## Feb 2021            2.3 -0.0184235 4.618424 -1.2457228 5.845723
## Mar 2021            2.3 -0.2041836 4.804184 -1.5298183 6.129818
## Apr 2021            2.3 -0.3770849 4.977085 -1.7942480 6.394248
## May 2021            2.3 -0.5394773 5.139477 -2.0426058 6.642606
## Jun 2021            2.3 -0.6930719 5.293072 -2.2775084 6.877508
## Jul 2021            2.3 -0.8391603 5.439160 -2.5009313 7.100931
## Aug 2021            2.3 -0.9787460 5.578746 -2.7144092 7.314409
plot(naive_forecast)

# The MAPE for this type of forecast is 9.927873 which is less than 10%, so this is a good indicator that the naive forecast is performing well and the data does not have extremely small values that distort the percentages.

# In 1 year, the time series value for the natural gas prices will be $2.30.

# The forecast is taking the last value and predicting the values for the next whole year.

# Simple Moving Averages
plot(series,
     main = paste("Time Series Plot for Natural Gas Prices"),
     xlab = "Year",
     ylab = "Natural Gas Prices",
     col = "black")

MA3_forecast <- ma(series,order=3)
lines(MA3_forecast, col = "red")

MA6_forecast <- ma(series,order=6)
lines(MA6_forecast, col = "blue")

MA9_forecast <- ma(series,order=9)
lines(MA9_forecast, col = "green")

# It is observed that the moving average of the order 3 is the best fit that is closest to the data set plot. So in forecasting the next 12 months, we will be using that fit.

# Moving Average with the Order = 3
MA3_model <- ma(series, order = 3, centre = FALSE)
MA3_f <- forecast(MA3_model, h = 12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA3_f
##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## Aug 2020       1.910662 1.731994 2.089330 1.6374132 2.183911
## Sep 2020       1.919208 1.648457 2.189959 1.5051297 2.333286
## Oct 2020       1.926045 1.574912 2.277178 1.3890331 2.463057
## Nov 2020       1.931514 1.506165 2.356863 1.2809992 2.582029
## Dec 2020       1.935890 1.440671 2.431108 1.1785182 2.693261
## Jan 2021       1.939390 1.377798 2.500983 1.0805089 2.798272
## Feb 2021       1.942191 1.317218 2.567163 0.9863772 2.898004
## Mar 2021       1.944431 1.258718 2.630144 0.8957237 2.993138
## Apr 2021       1.946223 1.202134 2.690312 0.8082368 3.084209
## May 2021       1.947657 1.147322 2.747992 0.7236504 3.171663
## Jun 2021       1.948804 1.094152 2.803456 0.6417267 3.255881
## Jul 2021       1.949721 1.042502 2.856941 0.5622483 3.337195
plot(MA3_f)

# Based on the moving average of order 3 forecast, the natural gas price in July 2021 will be $1.95.

accuracy(MA3_f)
##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.006148345 0.3909769 0.2494662 -0.2500108 5.594669 0.1779877
##                   ACF1
## Training set 0.5576545
MA6_model <- ma(series, order = 3, centre = FALSE)
MA6_f <- forecast(MA6_model, h = 12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA6_f
##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## Aug 2020       1.910662 1.731994 2.089330 1.6374132 2.183911
## Sep 2020       1.919208 1.648457 2.189959 1.5051297 2.333286
## Oct 2020       1.926045 1.574912 2.277178 1.3890331 2.463057
## Nov 2020       1.931514 1.506165 2.356863 1.2809992 2.582029
## Dec 2020       1.935890 1.440671 2.431108 1.1785182 2.693261
## Jan 2021       1.939390 1.377798 2.500983 1.0805089 2.798272
## Feb 2021       1.942191 1.317218 2.567163 0.9863772 2.898004
## Mar 2021       1.944431 1.258718 2.630144 0.8957237 2.993138
## Apr 2021       1.946223 1.202134 2.690312 0.8082368 3.084209
## May 2021       1.947657 1.147322 2.747992 0.7236504 3.171663
## Jun 2021       1.948804 1.094152 2.803456 0.6417267 3.255881
## Jul 2021       1.949721 1.042502 2.856941 0.5622483 3.337195
plot(MA6_f)

accuracy(MA6_f)
##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.006148345 0.3909769 0.2494662 -0.2500108 5.594669 0.1779877
##                   ACF1
## Training set 0.5576545
MA9_model <- ma(series, order = 3, centre = FALSE)
MA9_f <- forecast(MA9_model, h = 12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA9_f
##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## Aug 2020       1.910662 1.731994 2.089330 1.6374132 2.183911
## Sep 2020       1.919208 1.648457 2.189959 1.5051297 2.333286
## Oct 2020       1.926045 1.574912 2.277178 1.3890331 2.463057
## Nov 2020       1.931514 1.506165 2.356863 1.2809992 2.582029
## Dec 2020       1.935890 1.440671 2.431108 1.1785182 2.693261
## Jan 2021       1.939390 1.377798 2.500983 1.0805089 2.798272
## Feb 2021       1.942191 1.317218 2.567163 0.9863772 2.898004
## Mar 2021       1.944431 1.258718 2.630144 0.8957237 2.993138
## Apr 2021       1.946223 1.202134 2.690312 0.8082368 3.084209
## May 2021       1.947657 1.147322 2.747992 0.7236504 3.171663
## Jun 2021       1.948804 1.094152 2.803456 0.6417267 3.255881
## Jul 2021       1.949721 1.042502 2.856941 0.5622483 3.337195
plot(MA9_f)

accuracy(MA9_f)
##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.006148345 0.3909769 0.2494662 -0.2500108 5.594669 0.1779877
##                   ACF1
## Training set 0.5576545
# As the moving average order goes up, the line graph becomes smoother indicating a departure from the original time series. This indicates that the error in the graph increases as the order goes up. 

# Simple Smoothing
ses_model <- ses(series,12)
plot(ses_model)

summary(ses_model)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
## ses(y = series, h = 12)
## 
##   Smoothing parameters:
##     alpha = 0.9682 
## 
##   Initial states:
##     l = 3.4084 
## 
##   sigma:  0.7395
## 
##      AIC     AICc      BIC 
## 1436.871 1436.957 1447.818 
## 
## Error measures:
##                      ME      RMSE       MAE      MPE     MAPE      MASE
## Training set -0.0040926 0.7368673 0.4534637 -1.05908 9.947884 0.3129002
##                      ACF1
## Training set -0.000732753
## 
## Forecasts:
##          Point Forecast       Lo 80    Hi 80      Lo 95    Hi 95
## Sep 2020       2.283018  1.33534203 3.230694  0.8336726 3.732364
## Oct 2020       2.283018  0.96392686 3.602110  0.2656421 4.300394
## Nov 2020       2.283018  0.67618472 3.889852 -0.1744215 4.740458
## Dec 2020       2.283018  0.43265994 4.133376 -0.5468606 5.112897
## Jan 2021       2.283018  0.21765200 4.348384 -0.8756868 5.441723
## Feb 2021       2.283018  0.02300728 4.543029 -1.1733702 5.739407
## Mar 2021       2.283018 -0.15615404 4.722190 -1.4473738 6.013410
## Apr 2021       2.283018 -0.32302729 4.889064 -1.7025844 6.268621
## May 2021       2.283018 -0.47983991 5.045876 -1.9424086 6.508445
## Jun 2021       2.283018 -0.62821810 5.194255 -2.1693335 6.735370
## Jul 2021       2.283018 -0.76939210 5.335429 -2.3852405 6.951277
## Aug 2021       2.283018 -0.90431932 5.470356 -2.5915939 7.157630
# The value of alpha is 0.9682. Since the alpha is closer to 1 indicating that the model reacts quickly to recent changes and therefore the most weight is on the latest observed values.

# The initial state value is 3.4084.

# The value of sigma is 0.7395. Since sigma is closer to 1, the forecast does have larger errors and it cannot be accepted that the model fits the data well. There is going to be high variability around predicted values therefore the predictions are less accurate.

checkresiduals(ses_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from Simple exponential smoothing
## Q* = 40.313, df = 24, p-value = 0.01979
## 
## Model df: 0.   Total lags used: 24
ses_residuals <- residuals(ses_model)
ses_residuals
##                Jan           Feb           Mar           Apr           May
## 1997  0.0416229595 -1.2986771573 -0.3012739909  0.1304250414  0.2241451118
## 1998 -0.2810299293  0.1310684294  0.0141655596  0.1904502036 -0.2839471947
## 1999  0.1174961206 -0.0762657895  0.0175761539  0.3605585977  0.1214591161
## 2000  0.0593240991  0.2418854126  0.1376874967  0.2543759240  0.5580844646
## 2001 -0.6220739079 -2.5797704816 -0.4619891401 -0.0546827373 -1.0017379029
## 2002  0.0186153958  0.0005916264  0.7100188028  0.4225655079  0.0834297926
## 2003  0.7121752797  2.3026340441 -1.7068186893 -0.7242453670  0.5269823497
## 2004  0.0625957264 -0.7680106099 -0.0044085782  0.3198598885  0.6301656474
## 2005 -0.4171131317 -0.0232565076  0.8192608718  0.2260373916 -0.6828161782
## 2006 -4.2756961746 -1.2858883106 -0.6908675413  0.2480431492 -0.9021168008
## 2007 -0.2000075151  1.4436434484 -0.8441187537  0.4631725860  0.0547203491
## 2008  0.8807024830  0.5779901021  0.8883694293  0.7982337697  1.1153691175
## 2009 -0.6074232986 -0.7393048623 -0.5834962646 -0.4785444237  0.3147911099
## 2010  0.5333899014 -0.4930480348 -1.0456698376 -0.2932330226  0.1006805960
## 2011  0.2574296478 -0.3918184832 -0.1324526041  0.2657904491  0.0784472361
## 2012 -0.5025688004 -0.1759724224 -0.3455926788 -0.2309834758  0.4726589839
## 2013 -0.0161190030 -0.0005122871  0.4799837187  0.3752546332 -0.1180738209
## 2014  0.4890306491  1.3055421588 -1.0585078286 -0.2736410341 -0.0886967400
## 2015 -0.5100012669 -0.1362086378 -0.0443289235 -0.2214088425  0.2329632808
## 2016  0.3446520459 -0.2790464190 -0.2688685316  0.1814549376  0.0057669217
## 2017 -0.2573816501 -0.4581799913  0.0154383234  0.2204906541  0.0570075378
## 2018  1.0440896806 -1.1668171972 -0.0170832752  0.1094570668  0.0034787167
## 2019 -0.9307618900 -0.4495810684  0.2457116106 -0.2921909008 -0.0192862837
## 2020 -0.2133498921 -0.1167805932 -0.1237114699 -0.0539317440  0.0082859648
##                Jun           Jul           Aug           Sep           Oct
## 1997 -0.0428763178 -0.0113626764  0.2996388764  0.3995229921  0.2026974654
## 1998  0.0209757141  0.0006666410 -0.3199788131  0.1598305729 -0.1049203345
## 1999  0.0438601606  0.0113939445  0.4903621174 -0.2344155251  0.1725499080
## 2000  0.7177367970 -0.2771892022  0.4311904937  0.6437039082 -0.0195420832
## 2001 -0.5018367970 -0.6259491582 -0.1598936431 -0.7850816700  0.2450488768
## 2002 -0.2373484707 -0.2775433056  0.0911792397  0.4628978188  0.5947116165
## 2003  0.0267483232 -0.7891498965 -0.0650804178 -0.3720683574 -0.0018249142
## 2004 -0.0399723503 -0.3412703838 -0.5308461065 -0.2768711194  1.1912006029
## 2005  0.6882990341  0.4718752196  1.9149969324  2.2808615971  1.7424893483
## 2006 -0.0686706826 -0.0421824617  0.9686593754 -2.2092144903  0.8797877088
## 2007 -0.2882609017 -1.1391613822 -0.0362043301 -0.1411506302  0.6555140122
## 2008  1.4554481747 -1.5537435810 -2.8793804007 -0.6815112118 -0.9516594920
## 2009 -0.0199954463 -0.4206354866 -0.2533684535 -0.1580524457  1.0149768461
## 2010  0.6631997868 -0.1489224737 -0.3147329891 -0.4400027066 -0.4739839741
## 2011  0.2324931758 -0.1126110033 -0.3635789538 -0.1715551077 -0.3354522896
## 2012  0.0450218416  0.4914308645 -0.0943815586  0.0070004065  0.4702224839
## 2013 -0.2137525707 -0.2167933910 -0.1968900330  0.1837425269  0.0658396248
## 2014  0.0071810789 -0.5397717741 -0.1571547911  0.0050053750 -0.1398409214
## 2015 -0.0625960626  0.0580105992 -0.0681563324 -0.1121661148 -0.3235648145
## 2016  0.6701832818  0.2512994727  0.0079866902  0.1702538295 -0.0045890671
## 2017 -0.1681882113 -0.0053452844 -0.0801698815  0.0774520758 -0.0975384519
## 2018  0.1701105591 -0.1345936204  0.1257224043  0.0439956546  0.2813982507
## 2019 -0.2406129483 -0.0376470558 -0.1511964823  0.3351947394 -0.2193469870
## 2020 -0.1197366591  0.1361945817  0.5343284768                            
##                Nov           Dec
## 1997 -0.0535579576 -0.6617021556
## 1998  0.2066654677 -0.3934318483
## 1999 -0.3545160941 -0.0212670759
## 2000  0.4993789220  3.3958710431
## 2001 -0.1122119635 -0.0435662717
## 2002 -0.0710991348  0.6977403583
## 2003 -0.1600579986  1.6549131065
## 2004 -0.1421417821  0.4054825119
## 2005 -3.0646209637  2.6526015534
## 2006  1.5879610292 -0.6295321153
## 2007  0.3808332604  0.0221034766
## 2008 -0.0902452268 -0.8628681344
## 2009 -0.3177424486  1.6799016481
## 2010  0.2649360481  0.5484200819
## 2011 -0.3406611983 -0.0808267456
## 2012  0.2349444059 -0.1925330994
## 2013 -0.0379075138  0.5987952399
## 2014  0.3355556368 -0.6293355171
## 2015 -0.2602833958 -0.1682722133
## 2016 -0.4301458477  1.0263292923
## 2017  0.1269000755 -0.1859669172
## 2018  0.8189432764 -0.0239727020
## 2019  0.3130288097 -0.4200514549
## 2020
plot(ses_residuals,
     main = "Residuals from Simple Exponential Smoothing Forecast for Natural Gas Prices",
     ylab = "Residuals", xlab = "Time",
     col = "blue", type = "l")
abline(h = 0, col = "red")

# The plot indicates that the model captured the main pattern of the data because there are random fluctuations around 0 except for the few years that had extremely high spikes. 

hist(ses_residuals,
     main = "Histogram of Residuals",
     xlab = "Residuals",
     col = "seagreen")

# The histogram roughly forms a bell-shaped curve centered around 0, however it is slightly higher left of the zero, indicating the histogram is slightly skewed to the right.

plot(fitted(ses_model), residuals(ses_model),
     main = "Residuals vs Fitted Values",
     xlab = "Fitted Values",
     ylab = "Residuals",
     col = "steelblue", pch = 19)
abline(h = 0, col = "red", lwd = 2)

# The plot forms a sideways fan-shaped, where the low fitted values are tightly clustered around 0 and high fitted values are spread out both negatively and positively. This basically indicates that the variance of residuals is not constant because there is small variance at lower fitted values and large variance at higher fitted values.

plot(series, residuals(ses_model),
     main = "Residuals vs Actual Values",
     xlab = "Actual Values",
     ylab = "Residuals",
     col = "navy", pch = 19)
abline(h = 0, col = "red", lwd = 2)

# This plot also fans sideways, where the variance increases with higher actual values indicating smaller variance at lower actual values and larger variance at higher actual values.

Acf(ses_residuals)

# ACF plot has spikes between the blue bands except for 2 spikes, this indicates that the residuals are uncorrelated, therefore the model is an adequate fit.

accuracy(ses_model)
##                      ME      RMSE       MAE      MPE     MAPE      MASE
## Training set -0.0040926 0.7368673 0.4534637 -1.05908 9.947884 0.3129002
##                      ACF1
## Training set -0.000732753
ses_forecast <- ses(series,12)
ses_forecast
##          Point Forecast       Lo 80    Hi 80      Lo 95    Hi 95
## Sep 2020       2.283018  1.33534203 3.230694  0.8336726 3.732364
## Oct 2020       2.283018  0.96392686 3.602110  0.2656421 4.300394
## Nov 2020       2.283018  0.67618472 3.889852 -0.1744215 4.740458
## Dec 2020       2.283018  0.43265994 4.133376 -0.5468606 5.112897
## Jan 2021       2.283018  0.21765200 4.348384 -0.8756868 5.441723
## Feb 2021       2.283018  0.02300728 4.543029 -1.1733702 5.739407
## Mar 2021       2.283018 -0.15615404 4.722190 -1.4473738 6.013410
## Apr 2021       2.283018 -0.32302729 4.889064 -1.7025844 6.268621
## May 2021       2.283018 -0.47983991 5.045876 -1.9424086 6.508445
## Jun 2021       2.283018 -0.62821810 5.194255 -2.1693335 6.735370
## Jul 2021       2.283018 -0.76939210 5.335429 -2.3852405 6.951277
## Aug 2021       2.283018 -0.90431932 5.470356 -2.5915939 7.157630
plot(ses_forecast)

# The MAPE for this type of forecast is 9.947884 which is less than 10%, so this is a good indicator that the simple exponential smoothing forecast is performing well and the data does not have extremely small values that distort the percentages.

# In 1 year, the time series value for the natural gas prices in August 2021 will be $2.28.

# The forecast is taking the last value and predicting the values for the next whole year.

# Holt-Winters
HW_model <- HoltWinters(series)
plot(HW_model)

HW_model$alpha
## alpha 
##     1
# The alpha = 1, therefore the level updates very quickly and the dataset is highly sensitive to recent changes in values.

HW_model$beta
## beta 
##    0
HW_model$gamma
## gamma 
##     0
# Beta and Gamma for this dataset are equal to 0. The beta component being equal to 0 means that there is no meaningful trend. The trend does not change over time and stays constant. The gamma component being equal to 0 means that the model does not update the seasonal indices as the new data comes in. This can also indicate that the seasonality is strong yet stable over time, so it stays constant.

HW_model$coef
##           a           b          s1          s2          s3          s4 
##  2.27725694 -0.02418561  0.39482639  0.55357639  0.48149306 -0.17267361 
##          s5          s6          s7          s8          s9         s10 
## -0.43059028 -0.26309028 -0.19059028  0.08357639 -0.12100694 -0.02767361 
##         s11         s12 
## -0.33059028  0.02274306
# a = 2.27725694 is the initial level of the series at the start. This is the baseline around which the series oscillates. 
# b = -0.02418561 is the initial trend. There is a slight negative trend at the start and the series decreases very slowly.
# s1 - s12 values are the seasonal indices for months 1 to 12. These values basically represent the monthly effect of each month relative to the level indicated by a. Positive values are above baseline and negative values are below baseline.

HW_model$sigma
## NULL
# Sigma in this case is NULL. So, the value has not been defined at all.

checkresiduals(HW_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from HoltWinters
## Q* = 61.754, df = 24, p-value = 3.596e-05
## 
## Model df: 0.   Total lags used: 24
HW_residuals <- residuals(HW_model)
HW_residuals
##                Jan           Feb           Mar           Apr           May
## 1998  2.732639e-02 -3.314394e-03 -3.831439e-02 -5.998106e-02 -6.123106e-02
## 1999  4.121023e-01 -2.233144e-01 -2.831439e-02  1.100189e-01  3.387689e-01
## 2000  3.421023e-01  9.668561e-02  8.168561e-02  1.893939e-05  7.787689e-01
## 2001 -4.478977e-01 -2.703314e+00 -4.283144e-01 -2.899811e-01 -7.712311e-01
## 2002  3.021023e-01 -1.433144e-01  6.616856e-01  1.500189e-01  2.987689e-01
## 2003  9.721023e-01  2.136686e+00 -1.828314e+00 -9.199811e-01  7.787689e-01
## 2004  2.921023e-01 -9.133144e-01 -2.831439e-02  7.001894e-02  8.487689e-01
## 2005 -1.478977e-01 -1.533144e-01  7.716856e-01 -4.998106e-02 -4.612311e-01
## 2006 -4.077898e+00 -1.293314e+00 -6.983144e-01  2.001894e-02 -6.812311e-01
## 2007  1.021023e-01  1.306686e+00 -9.383144e-01  2.400189e-01  2.687689e-01
## 2008  1.162102e+00  4.066856e-01  8.216856e-01  5.200189e-01  1.318769e+00
## 2009 -2.978977e-01 -8.633144e-01 -6.083144e-01 -7.099811e-01  5.587689e-01
## 2010  7.621023e-01 -6.533144e-01 -1.078314e+00 -5.099811e-01  3.387689e-01
## 2011  5.221023e-01 -5.433144e-01 -1.683144e-01  2.001894e-02  2.987689e-01
## 2012 -2.178977e-01 -3.033144e-01 -3.883144e-01 -4.699811e-01  7.087689e-01
## 2013  2.721023e-01 -1.433144e-01  4.316856e-01  1.100189e-01  9.876894e-02
## 2014  7.521023e-01  1.146686e+00 -1.148314e+00 -4.899811e-01  1.487689e-01
## 2015 -2.078977e-01 -2.633144e-01 -8.831439e-02 -4.699811e-01  4.687689e-01
## 2016  6.321023e-01 -4.333144e-01 -3.083144e-01 -5.998106e-02  2.287689e-01
## 2017 -7.897727e-03 -5.933144e-01 -1.831439e-02 -2.998106e-02  2.787689e-01
## 2018  1.332102e+00 -1.343314e+00 -2.831439e-02 -1.399811e-01  2.287689e-01
## 2019 -6.478977e-01 -5.633144e-01  2.116856e-01 -5.499811e-01  2.187689e-01
## 2020  8.210227e-02 -2.533144e-01 -1.683144e-01 -2.999811e-01  2.387689e-01
##                Jun           Jul           Aug           Sep           Oct
## 1998 -3.914773e-02  3.271023e-01 -6.491477e-01 -1.778977e-01 -2.445644e-01
## 1999 -2.914773e-02  3.371023e-01  1.608523e-01 -5.978977e-01  4.543561e-02
## 2000  6.308523e-01  2.710227e-02  1.108523e-01  2.821023e-01 -1.745644e-01
## 2001 -5.391477e-01 -2.828977e-01 -4.691477e-01 -1.127898e+00  1.354356e-01
## 2002 -3.091477e-01  5.710227e-02 -2.291477e-01  1.121023e-01  4.454356e-01
## 2003 -5.914773e-02 -4.628977e-01 -3.691477e-01 -7.178977e-01 -1.245644e-01
## 2004 -1.291477e-01 -1.289773e-02 -8.491477e-01 -6.078977e-01  1.065436e+00
## 2005  6.408523e-01  7.771023e-01  1.570852e+00  1.872102e+00  1.535436e+00
## 2006 -1.091477e-01  2.871023e-01  6.408523e-01 -2.587898e+00  8.154356e-01
## 2007 -3.591477e-01 -8.028977e-01 -3.291477e-01 -4.878977e-01  5.254356e-01
## 2008  1.350852e+00 -1.272898e+00 -3.159148e+00 -9.378977e-01 -1.064564e+00
## 2009 -9.914773e-02 -9.289773e-02 -5.691477e-01 -4.978977e-01  8.854356e-01
## 2010  5.908523e-01  1.571023e-01 -6.391477e-01 -7.778977e-01 -5.945644e-01
## 2011  1.608523e-01  2.071023e-01 -6.891477e-01 -5.078977e-01 -4.645644e-01
## 2012 -3.914773e-02  8.171023e-01 -4.391477e-01 -3.378977e-01  3.354356e-01
## 2013 -2.791477e-01  1.171023e-01 -5.191477e-01 -1.578977e-01 -7.456439e-02
## 2014 -5.914773e-02 -2.128977e-01 -4.691477e-01 -3.378977e-01 -2.745644e-01
## 2015 -1.391477e-01  3.871023e-01 -3.991477e-01 -4.578977e-01 -4.545644e-01
## 2016  6.008523e-01  5.571023e-01 -3.291477e-01 -1.778977e-01 -1.445644e-01
## 2017 -2.391477e-01  3.271023e-01 -4.091477e-01 -2.678977e-01 -2.345644e-01
## 2018  1.008523e-01  1.871023e-01 -1.991477e-01 -3.078977e-01  1.454356e-01
## 2019 -3.091477e-01  2.971023e-01 -4.791477e-01 -7.897727e-03 -3.645644e-01
## 2020 -1.891477e-01  4.671023e-01  2.008523e-01                            
##                Nov           Dec
## 1998  3.062689e-01  2.783523e-01
## 1999 -2.637311e-01  6.683523e-01
## 2000  5.962689e-01  4.058352e+00
## 2001 -2.373106e-02  6.383523e-01
## 2002  6.268939e-03  1.378352e+00
## 2003 -6.373106e-02  2.338352e+00
## 2004 -8.373106e-02  1.088352e+00
## 2005 -3.023731e+00  3.428352e+00
## 2006  1.656269e+00 -1.647727e-03
## 2007  4.562689e-01  6.883523e-01
## 2008  3.626894e-02 -1.816477e-01
## 2009 -2.537311e-01  2.368352e+00
## 2010  3.762689e-01  1.218352e+00
## 2011 -2.337311e-01  6.083523e-01
## 2012  3.162689e-01  4.783523e-01
## 2013  5.626894e-02  1.278352e+00
## 2014  4.362689e-01  3.835227e-02
## 2015 -1.537311e-01  5.183523e-01
## 2016 -3.337311e-01  1.718352e+00
## 2017  2.262689e-01  4.883523e-01
## 2018  9.062689e-01  6.283523e-01
## 2019  4.162689e-01  2.483523e-01
## 2020
plot(HW_residuals,
     main = "Residuals from Holt-Winters Forecast for Natural Gas Prices",
     ylab = "Residuals", xlab = "Time",
     col = "blue", type = "l")
abline(h = 0, col = "red")

# The plot indicates that the model captured the main pattern of the model because there are random fluctuations around 0 except for the few years that had extremely high spikes. 

hist(HW_residuals,
     main = "Histogram of Residuals",
     xlab = "Residuals",
     col = "lightblue")

# The histogram roughly forms a bell-shaped curve centered around 0, however it is slightly higher left of the zero, indicating the histogram is slightly skewed to the right.

plot(fitted(HW_model), residuals(HW_model),
     main = "Residuals vs Fitted Values",
     xlab = "Fitted Values",
     ylab = "Residuals",
     col = "steelblue", pch = 19)
abline(h = 0, col = "red", lwd = 2)

# The xhat plot follows the shape of the actual data very closely, therefore it can be interpreted that the model captures the overall pattern well. The level section mirrors the xhat plot but it is smoother and decreases gradually over time. It can be noted that the natural gas prices dropped in the later periods. The trend line is flat meaning the model does not have a strong upward or downward growth, therefore it stays constant. The seasonality oscillates regularly between -0.4 and 0.4. The pattern is consistent indicating that the seasonality is stable and strong over time.

plot(series, residuals(HW_model),
     main = "Residuals vs Actual Values",
     xlab = "Actual Values",
     ylab = "Residuals",
     col = "navy", pch = 19)
abline(h = 0, col = "red", lwd = 2)

# This plot fans sideways, where the variance increases with higher actual values indicating smaller variance at lower actual values and larger variance at higher actual values.

Acf(HW_residuals)

# ACF plot has most of the spikes between the blue bands except for the higher spikes on the negative portion which exceeds the blue band. Overall, the graph indicates that the residuals are uncorrelated and so the model is an adequate fit.

HW_forecast <- forecast(HW_model,12)
HW_forecast
##          Point Forecast       Lo 80    Hi 80       Lo 95    Hi 95
## Sep 2020       2.647898  1.59826928 3.697526  1.04262955 4.253166
## Oct 2020       2.782462  1.29806334 4.266861  0.51227009 5.052654
## Nov 2020       2.686193  0.86818339 4.504203 -0.09421287 5.466599
## Dec 2020       2.007841 -0.09141598 4.107098 -1.20269545 5.218377
## Jan 2021       1.725739 -0.62130192 4.072779 -1.86375014 5.315227
## Feb 2021       1.869053 -0.70200108 4.440107 -2.06303491 5.801141
## Mar 2021       1.917367 -0.85968841 4.694423 -2.32977297 6.164508
## Apr 2021       2.167348 -0.80144908 5.136146 -2.37303558 6.707733
## May 2021       1.938580 -1.21030579 5.087465 -2.87722499 6.754384
## Jun 2021       2.007727 -1.31148931 5.326944 -3.06857643 7.084031
## Jul 2021       1.680625 -1.80059872 5.161849 -3.64344724 7.004697
## Aug 2021       2.009773 -1.62624686 5.645792 -3.55103937 7.570585
accuracy(HW_forecast)
##                      ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 0.02330255 0.8178545 0.5353015 -0.2926888 12.28121 0.3693701
##                    ACF1
## Training set 0.01411268
plot(HW_forecast)

# The MAPE for this type of forecast is 12.28121 which is more than 10% but less than 20%, so this is a good indicator that the Holt Winters forecast is still performing good. The model captures overall trend and level reasonably well and it has an acceptable level of accuracy.

# In 1 year, the time series value for the natural gas prices in August 2021 will be $2.01.

# The forecast is mimicking the original plot, so it is adding some noise back into the forecast, therefore the forecast takes the trend of the overall plot of the actual dataset, then it looks into the next year's forecast. 

# Decomposition
stl_decomp <- stl(series,s.window ="periodic")
plot(stl_decomp)

# stl_decomp
attributes(stl_decomp)
## $names
## [1] "time.series" "weights"     "call"        "win"         "deg"        
## [6] "jump"        "inner"       "outer"      
## 
## $class
## [1] "stl"
# Yes the time series is seasonal because the seasonality plot spikes at regular intervals in a cyclical pattern.

decomp_natural_gas_prices <- decompose(series) 
decomp_natural_gas_prices
## $x
##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
## 1997  3.45  2.15  1.89  2.03  2.25  2.20  2.19  2.49  2.88  3.07  3.01  2.35
## 1998  2.09  2.23  2.24  2.43  2.14  2.17  2.17  1.85  2.02  1.91  2.12  1.72
## 1999  1.85  1.77  1.79  2.15  2.26  2.30  2.31  2.80  2.55  2.73  2.37  2.36
## 2000  2.42  2.66  2.79  3.04  3.59  4.29  3.99  4.43  5.06  5.02  5.52  8.90
## 2001  8.17  5.61  5.23  5.19  4.19  3.72  3.11  2.97  2.19  2.46  2.34  2.30
## 2002  2.32  2.32  3.03  3.43  3.50  3.26  2.99  3.09  3.55  4.13  4.04  4.74
## 2003  5.43  7.71  5.93  5.26  5.81  5.82  5.03  4.99  4.62  4.63  4.47  6.13
## 2004  6.14  5.37  5.39  5.71  6.33  6.27  5.93  5.41  5.15  6.35  6.17  6.58
## 2005  6.15  6.14  6.96  7.16  6.47  7.18  7.63  9.53 11.75 13.42 10.30 13.05
## 2006  8.69  7.54  6.89  7.16  6.25  6.21  6.17  7.14  4.90  5.85  7.41  6.73
## 2007  6.55  8.00  7.11  7.60  7.64  7.35  6.22  6.22  6.08  6.74  7.10  7.11
## 2008  7.99  8.54  9.41 10.18 11.27 12.69 11.09  8.26  7.67  6.74  6.68  5.82
## 2009  5.24  4.52  3.96  3.50  3.83  3.80  3.38  3.14  2.99  4.01  3.66  5.35
## 2010  5.83  5.32  4.29  4.03  4.14  4.80  4.63  4.32  3.89  3.43  3.71  4.25
## 2011  4.49  4.09  3.97  4.24  4.31  4.54  4.42  4.06  3.90  3.57  3.24  3.17
## 2012  2.67  2.51  2.17  1.95  2.43  2.46  2.95  2.84  2.85  3.32  3.54  3.34
## 2013  3.33  3.33  3.81  4.17  4.04  3.83  3.62  3.43  3.62  3.68  3.64  4.24
## 2014  4.71  6.00  4.90  4.66  4.58  4.59  4.05  3.91  3.92  3.78  4.12  3.48
## 2015  2.99  2.87  2.83  2.61  2.85  2.78  2.84  2.77  2.66  2.34  2.09  1.93
## 2016  2.28  1.99  1.73  1.92  1.92  2.59  2.82  2.82  2.99  2.98  2.55  3.59
## 2017  3.30  2.85  2.88  3.10  3.15  2.98  2.98  2.90  2.98  2.88  3.01  2.82
## 2018  3.87  2.67  2.69  2.80  2.80  2.97  2.83  2.96  3.00  3.28  4.09  4.04
## 2019  3.11  2.69  2.95  2.65  2.64  2.40  2.37  2.22  2.56  2.33  2.65  2.22
## 2020  2.02  1.91  1.79  1.74  1.75  1.63  1.77  2.30                        
## 
## $seasonal
##               Jan          Feb          Mar          Apr          May
## 1997  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 1998  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 1999  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2000  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2001  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2002  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2003  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2004  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2005  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2006  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2007  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2008  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2009  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2010  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2011  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2012  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2013  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2014  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2015  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2016  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2017  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2018  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2019  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
## 2020  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
##               Jun          Jul          Aug          Sep          Oct
## 1997  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 1998  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 1999  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2000  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2001  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2002  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2003  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2004  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2005  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2006  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2007  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2008  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2009  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2010  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2011  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2012  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2013  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2014  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2015  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2016  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2017  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2018  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2019  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## 2020  0.138795496 -0.115762311 -0.163606514                          
##               Nov          Dec
## 1997 -0.018244195  0.348476820
## 1998 -0.018244195  0.348476820
## 1999 -0.018244195  0.348476820
## 2000 -0.018244195  0.348476820
## 2001 -0.018244195  0.348476820
## 2002 -0.018244195  0.348476820
## 2003 -0.018244195  0.348476820
## 2004 -0.018244195  0.348476820
## 2005 -0.018244195  0.348476820
## 2006 -0.018244195  0.348476820
## 2007 -0.018244195  0.348476820
## 2008 -0.018244195  0.348476820
## 2009 -0.018244195  0.348476820
## 2010 -0.018244195  0.348476820
## 2011 -0.018244195  0.348476820
## 2012 -0.018244195  0.348476820
## 2013 -0.018244195  0.348476820
## 2014 -0.018244195  0.348476820
## 2015 -0.018244195  0.348476820
## 2016 -0.018244195  0.348476820
## 2017 -0.018244195  0.348476820
## 2018 -0.018244195  0.348476820
## 2019 -0.018244195  0.348476820
## 2020                          
## 
## $trend
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1997       NA       NA       NA       NA       NA       NA 2.440000 2.386667
## 1998 2.440000 2.412500 2.350000 2.265833 2.180417 2.117083 2.080833 2.051667
## 1999 1.998333 2.043750 2.105417 2.161667 2.206250 2.243333 2.293750 2.354583
## 2000 2.895833 3.033750 3.206250 3.406250 3.632917 4.036667 4.548750 4.911250
## 2001 5.382500 5.285000 5.104583 4.878333 4.639167 4.231667 3.712917 3.332083
## 2002 2.764167 2.764167 2.825833 2.952083 3.092500 3.265000 3.496250 3.850417
## 2003 4.960000 5.124167 5.247917 5.313333 5.352083 5.427917 5.515417 5.447500
## 2004 5.460833 5.515833 5.555417 5.649167 5.791667 5.881250 5.900417 5.932917
## 2005 6.375000 6.617500 7.064167 7.633750 8.100417 8.542083 8.917500 9.081667
## 2006 8.974167 8.813750 8.428750 7.827917 7.392083 7.008333 6.655833 6.585833
## 2007 6.872917 6.836667 6.847500 6.933750 6.957917 6.960833 7.036667 7.119167
## 2008 8.498750 8.786667 8.937917 9.004167 8.986667 8.915417 8.747083 8.465000
## 2009 5.604583 5.070000 4.661667 4.352917 4.113333 3.967917 3.972917 4.030833
## 2010 4.297083 4.398333 4.485000 4.498333 4.476250 4.432500 4.330833 4.223750
## 2011 4.147083 4.127500 4.117083 4.123333 4.109583 4.045000 3.924167 3.782500
## 2012 2.984583 2.872500 2.777917 2.723750 2.725833 2.745417 2.780000 2.841667
## 2013 3.473750 3.526250 3.582917 3.630000 3.649167 3.690833 3.785833 3.954583
## 2014 4.323750 4.361667 4.394167 4.410833 4.435000 4.423333 4.320000 4.117917
## 2015 3.298750 3.200833 3.100833 2.988333 2.843750 2.694583 2.600417 2.534167
## 2016 2.254167 2.255417 2.271250 2.311667 2.357500 2.445833 2.557500 2.635833
## 2017 3.007500 3.017500 3.020417 3.015833 3.030833 3.017917 3.009583 3.025833
## 2018 2.941250 2.937500 2.940833 2.958333 3.020000 3.115833 3.135000 3.104167
## 2019 3.034167 2.984167 2.935000 2.877083 2.777500 2.641667 2.520417 2.442500
## 2020 2.074167 2.052500       NA       NA       NA       NA       NA       NA
##           Sep      Oct      Nov      Dec
## 1997 2.404583 2.435833 2.447917 2.442083
## 1998 2.013750 1.983333 1.976667 1.987083
## 1999 2.433333 2.512083 2.604583 2.742917
## 2000 5.135833 5.327083 5.441667 5.442917
## 2001 3.103333 2.938333 2.836250 2.788333
## 2002 4.195833 4.392917 4.565417 4.768333
## 2003 5.327500 5.323750 5.364167 5.404583
## 2004 6.030417 6.156250 6.222500 6.266250
## 2005 9.137083 9.134167 9.125000 9.075417
## 2006 6.614167 6.641667 6.717917 6.823333
## 2007 7.237500 7.440833 7.699583 8.073333
## 2008 8.070417 7.565000 6.976667 6.296250
## 2009 4.077917 4.113750 4.148750 4.203333
## 2010 4.159167 4.154583 4.170417 4.166667
## 2011 3.641667 3.471250 3.297500 3.132500
## 2012 2.944167 3.105000 3.264583 3.388750
## 2013 4.111250 4.177083 4.220000 4.274167
## 2014 3.901250 3.729583 3.572083 3.424583
## 2015 2.451667 2.377083 2.309583 2.262917
## 2016 2.719583 2.816667 2.917083 2.984583
## 2017 3.010417 2.990000 2.962917 2.947917
## 2018 3.115833 3.120417 3.107500 3.077083
## 2019 2.361667 2.275417 2.200417 2.131250
## 2020                                    
## 
## $random
##                Jan           Feb           Mar           Apr           May
## 1997            NA            NA            NA            NA            NA
## 1998 -0.5011398633 -0.2043101532  0.0312234437  0.2129279891 -0.0482841321
## 1999 -0.2994731966 -0.2955601532 -0.1741932230  0.0370946558  0.0458825346
## 2000 -0.6269731966 -0.3955601532 -0.2750265563 -0.3174886775 -0.0507841321
## 2001  2.6363601367  0.3031898468  0.2666401103  0.3604279891 -0.4570341321
## 2002 -0.5953065300 -0.4659768198  0.3453901103  0.5266779891  0.3996325346
## 2003  0.3188601367  2.5640231802  0.8233067770 -0.0045720109  0.4500492013
## 2004  0.5280268034 -0.1676434865 -0.0241932230  0.1095946558  0.5304658679
## 2005 -0.3761398633 -0.4993101532  0.0370567770 -0.4249886775 -1.6382841321
## 2006 -0.4353065300 -1.2955601532 -1.3975265563 -0.6191553442 -1.1499507987
## 2007 -0.4740565300  1.1415231802  0.4037234437  0.7150113225  0.6742158679
## 2008 -0.6598898633 -0.2684768198  0.6133067770  1.2245946558  2.2754658679
## 2009 -0.5157231966 -0.5718101532 -0.5604432230 -0.8041553442 -0.2912007987
## 2010  1.3817768034  0.8998565135 -0.0537765563 -0.4195720109 -0.3441174654
## 2011  0.1917768034 -0.0593101532 -0.0058598897  0.1654279891  0.1925492013
## 2012 -0.4657231966 -0.3843101532 -0.4666932230 -0.7249886775 -0.3037007987
## 2013 -0.2948898633 -0.2180601532  0.3683067770  0.5887613225  0.3829658679
## 2014  0.2351101367  1.6165231802  0.6470567770  0.2979279891  0.1371325346
## 2015 -0.4598898633 -0.3526434865 -0.1296098897 -0.3295720109 -0.0016174654
## 2016 -0.1253065300 -0.2872268198 -0.4000265563 -0.3429053442 -0.4453674654
## 2017  0.1413601367 -0.1893101532  0.0008067770  0.1329279891  0.1112992013
## 2018  0.7776101367 -0.2893101532 -0.1096098897 -0.1095720109 -0.2278674654
## 2019 -0.0753065300 -0.3159768198  0.1562234437 -0.1783220109 -0.1453674654
## 2020 -0.2053065300 -0.1643101532            NA            NA            NA
##                Jun           Jul           Aug           Sep           Oct
## 1997            NA -0.1342376894  0.2669398468  0.6718854990  0.6181898468
## 1998 -0.0858788291  0.2049289773 -0.0380601532  0.2027188323 -0.0893101532
## 1999 -0.0821288291  0.1320123106  0.6090231802  0.3131354990  0.2019398468
## 2000  0.1145378376 -0.4429876894 -0.3176434865  0.1206354990 -0.3230601532
## 2001 -0.6504621624 -0.4871543561 -0.1984768198 -0.7168645010 -0.4943101532
## 2002 -0.1437954957 -0.3904876894 -0.5968101532 -0.4493645010 -0.2788934865
## 2003  0.2532878376 -0.3696543561 -0.2938934865 -0.5110311677 -0.7097268198
## 2004  0.2499545043  0.1453456439 -0.3593101532 -0.6839478343  0.1777731802
## 2005 -1.5008788291 -1.1717376894  0.6119398468  2.8093854990  4.2698565135
## 2006 -0.9371288291 -0.3700710227  0.7177731802 -1.5176978343 -0.8076434865
## 2007  0.2503711709 -0.7009043561 -0.7355601532 -0.9610311677 -0.7168101532
## 2008  3.6357878376  2.4586789773 -0.0413934865 -0.2039478343 -0.8409768198
## 2009 -0.3067121624 -0.4771543561 -0.7272268198 -0.8914478343 -0.1197268198
## 2010  0.2287045043  0.4149289773  0.2598565135 -0.0726978343 -0.7405601532
## 2011  0.3562045043  0.6115956439  0.4411065135  0.4548021657  0.0827731802
## 2012 -0.4242121624  0.2857623106  0.1619398468  0.1023021657  0.1990231802
## 2013  0.0003711709 -0.0500710227 -0.3609768198 -0.2947811677 -0.5130601532
## 2014  0.0278711709 -0.1542376894 -0.0443101532  0.2152188323  0.0344398468
## 2015 -0.0533788291  0.3553456439  0.3994398468  0.4048021657 -0.0530601532
## 2016  0.0053711709  0.3782623106  0.3477731802  0.4668854990  0.1473565135
## 2017 -0.1767121624  0.0861789773  0.0377731802  0.1660521657 -0.1259768198
## 2018 -0.2846288291 -0.1892376894  0.0194398468  0.0806354990  0.1436065135
## 2019 -0.3804621624 -0.0346543561 -0.0588934865  0.3948021657  0.0386065135
## 2020            NA            NA            NA                            
##                Nov           Dec
## 1997  0.5803275280 -0.4405601532
## 1998  0.1615775280 -0.6155601532
## 1999 -0.2163391387 -0.7313934865
## 2000  0.0965775280  3.1086065135
## 2001 -0.4780058053 -0.8368101532
## 2002 -0.5071724720 -0.3768101532
## 2003 -0.8759224720  0.3769398468
## 2004 -0.0342558053 -0.0347268198
## 2005  1.1932441947  3.6261065135
## 2006  0.7103275280 -0.4418101532
## 2007 -0.5813391387 -1.3118101532
## 2008 -0.2784224720 -0.8247268198
## 2009 -0.4705058053  0.7981898468
## 2010 -0.4421724720 -0.2651434865
## 2011 -0.0392558053 -0.3109768198
## 2012  0.2936608613 -0.3972268198
## 2013 -0.5617558053 -0.3826434865
## 2014  0.5661608613 -0.2930601532
## 2015 -0.2013391387 -0.6813934865
## 2016 -0.3488391387  0.2569398468
## 2017  0.0653275280 -0.4763934865
## 2018  1.0007441947  0.6144398468
## 2019  0.4678275280 -0.2597268198
## 2020                            
## 
## $figure
##  [1]  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
##  [6]  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## [11] -0.018244195  0.348476820
## 
## $type
## [1] "additive"
## 
## attr(,"class")
## [1] "decomposed.ts"
# The decomposition is additive.

monthly_indices <- decomp_natural_gas_prices$seasonal[1:12]
monthly_indices
##  [1]  0.151139863  0.021810153 -0.141223444 -0.048761322  0.007867465
##  [6]  0.138795496 -0.115762311 -0.163606514 -0.196468832  0.015976820
## [11] -0.018244195  0.348476820
# The monthly indices are: 0.151139863, 0.021810153, -0.141223444, -0.048761322, 0.007867465, 0.138795496, -0.115762311, -0.163606514, -0.196468832, 0.015976820, -0.018244195, 0.348476820

# The monthly time series value for December is the highest (0.348476820) and September is the lowest (-0.196468832). 

# The time series value for December is the highest because it is peak winter season therefore due to lower temperatures, the demand for natural gas increases due to high demand in heating. The time series value is lowest in September because it is the start of fall so it is not cold enough for the heating systems, therefore the demand for natural gas is lower. 

seasadj <- seasadj(stl_decomp)
plot(seasadj)
lines(series, col="Red")

# Seasonality does not have big fluctuations in the values of the time series because it is stable so even without seasonality, the graph follows the original time series plot.

# Accuracy
Naive <- accuracy(naive_model)
MA3 <- accuracy(MA3_f)
MA6 <- accuracy(MA6_f)
MA9 <- accuracy(MA9_f)
SES <- accuracy(ses_model)
HoltWinters <- accuracy(HW_forecast)

models <- list(
  Naive = Naive,
  MA3 = MA3,
  MA6 = MA6,
  MA9 = MA9,
  SES = SES,
  HoltWinters = HoltWinters
)

accuracy_table <- do.call(rbind, lapply(names(models), function(name) {
  acc <- models[[name]][1, c("RMSE", "MAE", "MAPE")]
  df <- data.frame(
    Model = name,
    RMSE = acc["RMSE"],
    MAE  = acc["MAE"],
    MAPE = acc["MAPE"],
    stringsAsFactors = FALSE
  )
  rownames(df) <- NULL
  return(df)
}))

rownames(accuracy_table) <- NULL
accuracy_table
# Naive forecast: it is a benchmarking model, it looks at the last available data and places all weight onto the most recent data. It takes the last value to forecast the future values.

# Moving Average: it takes a fixed number of data points of the order k and uses them to smooth the series and forecast future values. 

# Simple Exponential Smoothing: it places a weight between 0 to 1 to all of the data points where the the weight placed on the older data exponentially decreases, therefore giving more weight to the recent observed values.

# Holt Winters: it is similar to SES, however this adds level which is a baseline value of the series, trend which is the direction and speed of change, and seasonality or repeating patterns at regular intervals to the model.

# Based on the accuracy measures except for the Holt Winters model, all the other models have a MAPE that is less than 10%, which means that all the other models are an accurate fit to the dataset. In this case, we have the ability to pick the model that best fits our business needs. However, since the moving average model as the lowest MAPE relatively speaking, it can be considered to be the best fit for our data with the smallest amount of error. The predicted natural gas price for August 2021 is $1.95 based on the moving average model. As mentioned earlier, the worst model fit for this dataset would be the Holt Winters because the trend and seasonality for this dataset are constant therefore this model does not provide accurate forecast of the future values.

# Conclusion
# Trend: Natural gas prices exhibit irregular spikes during extreme winters (2001), hurricanes (2005), and economic shocks (2009), apart from this the dataset has a relatively stable trend with no variability.
# Seasonality: it is stable, peaking in December due to the high demands, and lowest in September.
# Distribution & Skewness: series is right-skewed, and variability increases at higher costs. 
# Autocorrelation: residuals are uncorrelated indicating adequate model fit for all of the models.

# Based on the analysis and forecast, the moving average model suggests that the forecasted value for natural gas prices in 12 months will be $1.95 which is slightly less than the recent values but it is consistent with the absence of trend. In the next 2 years, the trend and seasonality are expected to be stable indicating that the forecasted values will stay within the same range as the current values unless external shocks cause significant spikes in the data. 

# Model Ranking for this dataset:
# 1. Moving Average with Order = 3: best fit to historical data, smooth but responsive
# 2. Naive: works well beyond the benchmarking model because the series is pretty stable.
# 3. Simple Exponential Smoothing (SES): good fit, reacts quickly to recent changes (high alpha)
# 4. Moving Average with Order = 6 and 9: too smooth, the details of spikes are lost, therefore it has higher error. 
# 5. Holt-Winters: worst for this dataset, because trend and seasonality are constant this model is not useful.